
library(rio)
library(ggplot2)
library(texreg)
library(tidyverse)
library(AER)
library(broom)

##set the working directory for the replication archive
working_dir <- "~/Dropbox/dueling project/2nd_paper/replication archive/"

pos <- readRDS(paste0(working_dir,"Data/PostOfficesByDecade_county.Rds"))
POgrowth <-
  pos %>% 
  group_by(decade) %>% summarise(post = sum(numPost)) %>% 
  mutate(post_lag = lag(post), pct_increase = 100*(post - post_lag)/post_lag)

POgrowth$pop_increase <- c(35.1, 36.38, 33.13, 33.49, 32.67, 35.87,
                           35.58, 22.63, 30.16, 25.48)

POgrowth$pop <- c(5308483,
                  7239881,	
                  9638453,	
                  12866020,	
                  17069453,	
                  23191876,	
                  31443321,	
                  38558371,	
                  50189209,	
                  62979766)

POgrowth$PO_per_cap <- 1000*POgrowth$post/POgrowth$pop

POgrowth <- reshape2::melt(POgrowth[,c("decade", "pct_increase", "pop_increase")], id.vars = "decade", factorsAsStrings = FALSE)
POgrowth$varname <- NA
POgrowth$varname[POgrowth$variable == "pct_increase"] <- "% Increase in Post Offices"
POgrowth$varname[POgrowth$variable == "pop_increase"] <- "% Increase in Population"

POgrowthPlot <-
  POgrowth %>% 
  filter(decade >= 1810) %>%
  ggplot(., aes(decade, value, linetype = varname)) +
  geom_line() + 
  xlab("Decade") +
  ylab("Growth (%) in Post Offices and Population\nSince Previous Decade, 1800-1890") +
  theme_bw() +
  labs(linetype = "") +
  theme(legend.position = "bottom",
        legend.title = element_text(size = 20),
        axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20),
        legend.text = element_text(size = 20),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        strip.text.x = element_text(size = 20))


ggsave(POgrowthPlot, 
       file = paste0(working_dir,"Replication Plots/POgrowthPlot.pdf"), 
       width = 14, height = 7)

##now focus on geography
POgrowth_region <-
  pos %>% 
  group_by(state, decade) %>% 
  summarise(post = sum(numPost)) %>% 
  mutate(post = ifelse(post == 0, NA, post)) %>%
  mutate(post_lag = lag(post), pct_increase = 100*(post - post_lag)/post_lag) %>%
  ungroup() %>%
  group_by(decade) %>%
  mutate(post_rank = rank(post, na.last = "keep"), maxRank = max(post_rank, na.rm = TRUE)) %>%
  ungroup() %>%
  arrange(state, decade) %>% 
  group_by(state) %>% 
  mutate(post_rank_lag = lag(post_rank), maxRank_lag = lag(maxRank), firstPost = min(decade[!is.na(post)])) %>%
  ungroup()


POgrowth_region %>%
  mutate(post1850 = (decade > 1850)*1, timeTrend = (decade - 1800)/10) %>%
  filter(decade >= 1810, !is.na(post), firstPost < 1850) %>%
  nest(-state) %>%
  mutate(
    fit = purrr::map(data, ~ lm(I(post_rank/maxRank - post_rank_lag/maxRank_lag) ~ post1850*timeTrend, data = .x)),
    tidied = purrr::map(fit, tidy)
  ) %>%
  unnest(tidied) %>%
  filter(term == "post1850:timeTrend")

postGrowthState <-
  POgrowth_region %>% 
  filter(decade >= 1810, !is.na(post)) %>%
  ggplot(., aes(decade, post_rank/maxRank)) +
  geom_line() + 
  xlab("Decade") +
  ylab("Relative Ranks of Post Offices by State, 1800-1890\n(Normalized to the Unit Interval)") +
  theme_bw() +
  facet_wrap(~ state) + 
  # stat_smooth(method = "lm", formula = y ~ x + I(x > 1850)) +
  geom_vline(xintercept = 1850, linetype = "dashed") +
  theme(legend.title = element_text(size = 15),
        axis.text.x = element_text(size = 15), axis.text.y = element_text(size = 15),
        legend.text = element_text(size = 15),
        axis.title.x = element_text(size = 15),
        axis.title.y = element_text(size = 15),
        strip.text.x = element_text(size = 15))

ggsave(postGrowthState, 
       file = paste0(working_dir,"Replication Plots/POgrowthStatePlot.pdf"), 
       width = 14, height = 10)
